From 6ba37cfd00ec93a86851e2b729e6812f8364c576 Mon Sep 17 00:00:00 2001 From: "kaf24@scramble.cl.cam.ac.uk[kaf24]" Date: Wed, 14 Jan 2004 18:05:25 +0000 Subject: [PATCH] bitkeeper revision 1.673 (400584e59PLn5rFfGWkKhTGVKRl8tQ) maw-diff --- xen/drivers/block/xen_vbd.c | 30 +- xen/include/hypervisor-ifs/hypervisor-if.h | 2 + .../arch/xeno/drivers/block/xl_block.c | 100 ++- .../arch/xeno/drivers/block/xl_block.h | 13 + .../arch/xeno/drivers/block/xl_vbd.c | 643 +++++++++++++----- 5 files changed, 586 insertions(+), 202 deletions(-) diff --git a/xen/drivers/block/xen_vbd.c b/xen/drivers/block/xen_vbd.c index 13da02d03c..e5ffdfa016 100644 --- a/xen/drivers/block/xen_vbd.c +++ b/xen/drivers/block/xen_vbd.c @@ -14,6 +14,9 @@ #include #include +#include +#include + /* ** XXX SMH: the below probe functions /append/ their info to the ** xdi array; i.e. they assume that all earlier slots are correctly @@ -36,9 +39,10 @@ long vbd_create(vbd_create_t *create) struct task_struct *p; vbd_t *new_vbd, **pv; long ret = 0; + unsigned long cpu_mask; if ( unlikely(!IS_PRIV(current)) ) - return -EPERM; + return -EPERM; if ( unlikely((p = find_domain_by_id(create->domain)) == NULL) ) { @@ -77,6 +81,9 @@ long vbd_create(vbd_create_t *create) *pv = new_vbd; + cpu_mask = mark_guest_event(p, _EVENT_VBD_UPD); + guest_event_notify(cpu_mask); + out: spin_unlock(&p->vbd_lock); put_task_struct(p); @@ -91,6 +98,7 @@ long vbd_grow(vbd_grow_t *grow) xen_extent_le_t **px, *x; vbd_t *v; long ret = 0; + unsigned long cpu_mask; if ( unlikely(!IS_PRIV(current)) ) return -EPERM; @@ -132,6 +140,9 @@ long vbd_grow(vbd_grow_t *grow) *px = x; + cpu_mask = mark_guest_event(p, _EVENT_VBD_UPD); + guest_event_notify(cpu_mask); + out: spin_unlock(&p->vbd_lock); put_task_struct(p); @@ -145,6 +156,7 @@ long vbd_shrink(vbd_shrink_t *shrink) xen_extent_le_t **px, *x; vbd_t *v; long ret = 0; + unsigned long cpu_mask; if ( !IS_PRIV(current) ) return -EPERM; @@ -177,6 +189,9 @@ long vbd_shrink(vbd_shrink_t *shrink) *px = x->next; kfree(x); + cpu_mask = mark_guest_event(p, _EVENT_VBD_UPD); + guest_event_notify(cpu_mask); + out: spin_unlock(&p->vbd_lock); put_task_struct(p); @@ -192,6 +207,7 @@ long vbd_setextents(vbd_setextents_t *setextents) vbd_t *v; int i; long ret = 0; + unsigned long cpu_mask; if ( !IS_PRIV(current) ) return -EPERM; @@ -253,6 +269,9 @@ long vbd_setextents(vbd_setextents_t *setextents) /* Make the new list visible. */ v->extents = new_extents; + cpu_mask = mark_guest_event(p, _EVENT_VBD_UPD); + guest_event_notify(cpu_mask); + out: spin_unlock(&p->vbd_lock); put_task_struct(p); @@ -274,6 +293,7 @@ long vbd_delete(vbd_delete_t *delete) struct task_struct *p; vbd_t *v, **pv; xen_extent_le_t *x, *t; + unsigned long cpu_mask; if( !IS_PRIV(current) ) return -EPERM; @@ -314,6 +334,9 @@ long vbd_delete(vbd_delete_t *delete) x = t; } + cpu_mask = mark_guest_event(p, _EVENT_VBD_UPD); + guest_event_notify(cpu_mask); + spin_unlock(&p->vbd_lock); put_task_struct(p); return 0; @@ -325,6 +348,7 @@ void destroy_all_vbds(struct task_struct *p) int i; vbd_t *v; xen_extent_le_t *x, *t; + unsigned long cpu_mask; spin_lock(&p->vbd_lock); for ( i = 0; i < VBD_HTAB_SZ; i++ ) @@ -344,6 +368,10 @@ void destroy_all_vbds(struct task_struct *p) } } } + + cpu_mask = mark_guest_event(p, _EVENT_VBD_UPD); + guest_event_notify(cpu_mask); + spin_unlock(&p->vbd_lock); } diff --git a/xen/include/hypervisor-ifs/hypervisor-if.h b/xen/include/hypervisor-ifs/hypervisor-if.h index 145b1a0aac..ef29e292cb 100644 --- a/xen/include/hypervisor-ifs/hypervisor-if.h +++ b/xen/include/hypervisor-ifs/hypervisor-if.h @@ -93,6 +93,7 @@ #define EVENT_PS2 0x20 /* PS/2 keyboard or mouse event(s) */ #define EVENT_STOP 0x40 /* Prepare for stopping and possible pickling */ #define EVENT_EVTCHN 0x80 /* Event pending on an event channel */ +#define EVENT_VBD_UPD 0x100 /* Event to signal VBDs should be reprobed */ /* Bit offsets, as opposed to the above masks. */ #define _EVENT_BLKDEV 0 @@ -103,6 +104,7 @@ #define _EVENT_PS2 5 #define _EVENT_STOP 6 #define _EVENT_EVTCHN 7 +#define _EVENT_VBD_UPD 8 /* * Virtual addresses beyond this are not modifiable by guest OSes. The diff --git a/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_block.c b/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_block.c index 8271654f1d..d27797b41d 100644 --- a/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_block.c +++ b/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_block.c @@ -5,13 +5,21 @@ * */ +/* Some modifications to the original by Mark A. Williamson and (C) Intel + * Research Cambridge */ + #include "xl_block.h" #include #include +#include +#include + +#include typedef unsigned char byte; /* from linux/ide.h */ #define XLBLK_RESPONSE_IRQ _EVENT_BLKDEV +#define XLBLK_UPDATE_IRQ _EVENT_VBD_UPD #define DEBUG_IRQ _EVENT_DEBUG #define STATE_ACTIVE 0 @@ -24,12 +32,13 @@ static BLK_RING_IDX resp_cons; /* Response consumer for comms ring. */ static BLK_RING_IDX req_prod; /* Private request producer. */ #define XDI_MAX 64 -static xen_disk_info_t xlblk_disk_info; /* information about our disks/VBDs */ +xen_disk_info_t xlblk_disk_info; /* information about our disks/VBDs */ /* We plug the I/O ring if the driver is suspended or if the ring is full. */ #define RING_PLUGGED (((req_prod - resp_cons) == BLK_RING_SIZE) || \ (state != STATE_ACTIVE)) + /* * Request queues with outstanding work, but ring is currently full. * We need no special lock here, as we always access this with the @@ -56,13 +65,6 @@ static inline void signal_requests_to_xen(void) return; } -static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev) -{ - struct gendisk *gd = get_gendisk(xldev); - return (xl_disk_t *)gd->real_devices + - (MINOR(xldev) >> gd->minor_shift); -} - int xenolinux_block_open(struct inode *inode, struct file *filep) { @@ -97,7 +99,8 @@ int xenolinux_block_open(struct inode *inode, struct file *filep) return -ENODEV; /* no such device */ } } - + + /* RACE: need locking SMP / pre-emptive kernels */ disk->usage++; DPRINTK("xenolinux_block_open\n"); return 0; @@ -107,8 +110,18 @@ int xenolinux_block_open(struct inode *inode, struct file *filep) int xenolinux_block_release(struct inode *inode, struct file *filep) { xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev); - disk->usage--; + disk->usage--; /* RACE: need locking for SMP / pre-emptive kernels */ DPRINTK("xenolinux_block_release\n"); + + /* A reference to a disk has been dropped: may enable more changes to VBDs to + * go through (currently don't do any updates while references are held), so + * we run the update magic again. Could equally well schedule this update for + * keventd to run, or use a flag so we only update at this point if we think + * something (relevant) may have changed. + * Keventd has the advantage that it'll serialise executions of this function + * - there's a race here for SMP / pre-emptive kernels */ + xlvbd_update_vbds(); + return 0; } @@ -136,7 +149,7 @@ int xenolinux_block_ioctl(struct inode *inode, struct file *filep, return put_user(part->nr_sects, (unsigned long *) argument); case BLKRRPART: /* re-read partition table */ - DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART); + DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART); return xenolinux_block_revalidate(dev); case BLKSSZGET: @@ -185,7 +198,7 @@ int xenolinux_block_ioctl(struct inode *inode, struct file *filep, return 0; default: - printk("ioctl %08x not supported by xl_block\n", command); + printk(KERN_ALERT "ioctl %08x not supported by xl_block\n", command); return -ENOSYS; } @@ -199,12 +212,15 @@ int xenolinux_block_check(kdev_t dev) return 0; } +/* MAW - leaving this as it is for now. As long as we're responding to the VBD + * update events from the hypervisor, I figure this will still do what it's + * meant to do :-) */ int xenolinux_block_revalidate(kdev_t dev) { struct gendisk *gd = get_gendisk(dev); xl_disk_t *disk = xldev_to_xldisk(dev); unsigned long flags, capacity = gd->part[MINOR(dev)].nr_sects; - int i, disk_nr = MINOR(dev) >> gd->minor_shift; + int i, disk_nr = MINOR(dev) >> gd->minor_shift; DPRINTK("xenolinux_block_revalidate: %d\n", dev); @@ -223,6 +239,8 @@ int xenolinux_block_revalidate(kdev_t dev) } spin_unlock_irqrestore(&io_request_lock, flags); + /* RACE? is it OK that we give up the lock */ + for ( i = gd->max_p - 1; i >= 0; i-- ) { invalidate_device(dev+i, 1); @@ -231,7 +249,8 @@ int xenolinux_block_revalidate(kdev_t dev) gd->sizes[MINOR(dev+i)] = 0; } - /* XXX Should perhaps revalidate VBDs here */ + /* shouldn't need to revalidate VBDs here as it's done automatically when + * we get the VBD update event from Xen */ grok_partitions(gd, disk_nr, gd->max_p, capacity); @@ -345,8 +364,8 @@ void do_xlblk_request(request_queue_t *rq) struct request *req; struct buffer_head *bh, *next_bh; int rw, nsect, full, queued = 0; - - DPRINTK("xlblk.c::do_xlblk_request for '%s'\n", DEVICE_NAME); + + DPRINTK("xlblk.c::do_xlblk_request\n"); while ( !rq->plugged && !list_empty(&rq->queue_head)) { @@ -430,12 +449,49 @@ static void kick_pending_request_queues(void) } +/** + * do_update_vbds - called in process context by keventd to update VBDs + * @arg: dummy argument to fit schedule_task API + * + * When this function is run, it simply calls through to xlvbd_update_vbds in + * update the VBD state information. The argument is ignored - it's only there + * because the API for scheduling with keventd requires it. + */ +void do_update_vbds(void * arg) +{ + DPRINTK("xl_block.c::do_update_vbds() - called\n"); + xlvbd_update_vbds(); +} + +/* this data is needed to register do_update_vbds() as a task for keventd */ +static struct tq_struct update = { + .sync = 0, + .routine = do_update_vbds, + .data = 0 +}; + +/** + * xlblk_update_int - handle VBD update events from Xen + * + * This function schedules a task for keventd to run, which will update the + * VBDs and perform the corresponding updates to our view of VBD state, so the + * XenoLinux will respond to changes / additions / deletions to the set of VBDs + * automatically. + */ +static void xlblk_update_int(int irq, void *dev_id, struct pt_regs *ptregs) +{ + DPRINTK("xl_block.c::xlblk_update_int() - called\n"); + + schedule_task(&update); +} + + static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs) { BLK_RING_IDX i; unsigned long flags; struct buffer_head *bh, *next_bh; - + if ( unlikely(state == STATE_CLOSED) ) return; @@ -512,6 +568,15 @@ int __init xlblk_init(void) goto fail; } + error = request_irq(XLBLK_UPDATE_IRQ, xlblk_update_int, + SA_INTERRUPT, "blkdev", NULL); + + if ( error ) + { + printk(KERN_ALERT "Could not allocate block update interrupt\n"); + goto fail; + } + /* Setup our [empty] disk information structure */ xlblk_disk_info.max = XDI_MAX; xlblk_disk_info.disks = kmalloc(XDI_MAX * sizeof(xen_disk_t), GFP_KERNEL); @@ -548,6 +613,7 @@ static void __exit xlblk_cleanup(void) { xlvbd_cleanup(); free_irq(XLBLK_RESPONSE_IRQ, NULL); + free_irq(XLBLK_UPDATE_IRQ, NULL); } diff --git a/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_block.h b/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_block.h index 9c6dcea522..d856dd7601 100644 --- a/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_block.h +++ b/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_block.h @@ -62,6 +62,19 @@ extern int xenolinux_block_check(kdev_t dev); extern int xenolinux_block_revalidate(kdev_t dev); extern void do_xlblk_request (request_queue_t *rq); +extern xen_disk_info_t xlblk_disk_info; /* this is really in xl_block.c */ +extern void xlvbd_update_vbds(void); /* this is really in xl_vbd.c */ + +static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev) +{ + struct gendisk *gd = get_gendisk(xldev); + + if(!gd) return NULL; + + return (xl_disk_t *)gd->real_devices + + (MINOR(xldev) >> gd->minor_shift); +} + /* Virtual block-device subsystem. */ extern int xlvbd_init(xen_disk_info_t *xdi); diff --git a/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_vbd.c b/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_vbd.c index d4e01f73d8..f9dec059aa 100644 --- a/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_vbd.c +++ b/xenolinux-2.4.24-sparse/arch/xeno/drivers/block/xl_vbd.c @@ -5,6 +5,9 @@ * */ +/* Some modifications to the original by Mark A. Williamson and (C) Intel + * Research Cambridge */ + #include "xl_block.h" #include @@ -30,6 +33,10 @@ #define XLVBD_PARTN_SHIFT 6 /* amount to shift minor to get 'real' minor */ #define XLVBD_MAX_PART (1 << XLVBD_PARTN_SHIFT) /* minors per 'other' vbd */ +/* Used to record data in vbd_state[] and detect changes in configuration */ +#define VBD_NODEV 1 +#define VBD_KNOWN 2 + /* The below are for the generic drivers/block/ll_rw_block.c code. */ static int xlide_blksize_size[256]; static int xlide_hardsect_size[256]; @@ -50,6 +57,345 @@ static struct block_device_operations xlvbd_block_fops = revalidate: xenolinux_block_revalidate, }; + /* hold state about for all possible VBDs for use in handling updates */ +static char vbd_state[65536]; + +/** + * xlvbd_init_device - initialise a VBD device + * @disk: a xen_disk_t describing the VBD + * + * Takes a xen_disk_t * that describes a VBD the domain has access to. + * Performs appropriate initialisation and registration of the device. + * + * Care needs to be taken when making re-entrant calls to ensure that + * corruption does not occur. Also, devices that are in use should not have + * their details updated. This is the caller's responsibility. + */ +int xlvbd_init_device(xen_disk_t *disk) +{ + int device = disk->device; + int major = MAJOR(device); + int minor = MINOR(device); + int is_ide = IDE_DISK_MAJOR(major); /* is this an ide device? */ + int is_scsi= SCSI_BLK_MAJOR(major); /* is this a scsi device? */ + int partno; + char * major_name; + int max_part; + + struct gendisk *gd; + int result; + int j; + + unsigned char buf[64]; + + if ( is_ide ) + { + major_name = XLIDE_MAJOR_NAME; + max_part = XLIDE_MAX_PART; + } + else if ( is_scsi ) + { + major_name = XLSCSI_MAJOR_NAME; + max_part = XLSCSI_MAX_PART; + } + else + { + major_name = XLVBD_MAJOR_NAME; + max_part = XLVBD_MAX_PART; + } + + partno = minor & (max_part - 1); + + if ( (gd = get_gendisk(device)) == NULL ) + { + result = register_blkdev(major, major_name, &xlvbd_block_fops); + if ( result < 0 ) + { + printk(KERN_ALERT "XL VBD: can't get major %d\n", major); + return -1; /* XXX make this sane one day */ + } + + if ( is_ide ) + { + blksize_size[major] = xlide_blksize_size; + hardsect_size[major] = xlide_hardsect_size; + max_sectors[major] = xlide_max_sectors; + read_ahead[major] = 8; /* from drivers/ide/ide-probe.c */ + } + else if ( is_scsi ) + { + blksize_size[major] = xlscsi_blksize_size; + hardsect_size[major] = xlscsi_hardsect_size; + max_sectors[major] = xlscsi_max_sectors; + read_ahead[major] = 0; /* XXX 8; -- guessing */ + } + else + { + blksize_size[major] = xlvbd_blksize_size; + hardsect_size[major] = xlvbd_hardsect_size; + max_sectors[major] = xlvbd_max_sectors; + read_ahead[major] = 8; + } + + blk_init_queue(BLK_DEFAULT_QUEUE(major), do_xlblk_request); + + /* + * Turn off barking 'headactive' mode. We dequeue buffer heads as + * soon as we pass them down to Xen. + */ + blk_queue_headactive(BLK_DEFAULT_QUEUE(major), 0); + + /* Construct an appropriate gendisk structure. */ + gd = kmalloc(sizeof(struct gendisk), GFP_KERNEL); + gd->major = major; + gd->major_name = major_name; + + gd->max_p = max_part; + if ( is_ide ) + { + gd->minor_shift = XLIDE_PARTN_SHIFT; + gd->nr_real = XLIDE_DEVS_PER_MAJOR; + } + else if ( is_scsi ) + { + gd->minor_shift = XLSCSI_PARTN_SHIFT; + gd->nr_real = XLSCSI_DEVS_PER_MAJOR; + } + else + { + gd->minor_shift = XLVBD_PARTN_SHIFT; + gd->nr_real = XLVBD_DEVS_PER_MAJOR; + } + + /* + ** The sizes[] and part[] arrays hold the sizes and other + ** information about every partition with this 'major' (i.e. + ** every disk sharing the 8 bit prefix * max partns per disk) + */ + gd->sizes = kmalloc(max_part*gd->nr_real*sizeof(int), GFP_KERNEL); + gd->part = kmalloc(max_part*gd->nr_real*sizeof(struct hd_struct), + GFP_KERNEL); + memset(gd->sizes, 0, max_part * gd->nr_real * sizeof(int)); + memset(gd->part, 0, max_part * gd->nr_real + * sizeof(struct hd_struct)); + + + gd->real_devices = kmalloc(gd->nr_real * sizeof(xl_disk_t), + GFP_KERNEL); + memset(gd->real_devices, 0, gd->nr_real * sizeof(xl_disk_t)); + + gd->next = NULL; + gd->fops = &xlvbd_block_fops; + + gd->de_arr = kmalloc(gd->nr_real * sizeof(*gd->de_arr), + GFP_KERNEL); + gd->flags = kmalloc(gd->nr_real * sizeof(*gd->flags), GFP_KERNEL); + + memset(gd->de_arr, 0, gd->nr_real * sizeof(*gd->de_arr)); + memset(gd->flags, 0, gd->nr_real * sizeof(*gd->flags)); + + add_gendisk(gd); + + blk_size[major] = gd->sizes; + } + + if ( XD_READONLY(disk->info) ) + set_device_ro(device, 1); + + gd->flags[minor >> gd->minor_shift] |= GENHD_FL_XENO; + + if ( partno != 0 ) + { + /* + * If this was previously set up as a real disc we will have set + * up partition-table information. Virtual partitions override + * 'real' partitions, and the two cannot coexist on a device. + */ + if ( gd->sizes[minor & ~(max_part-1)] != 0 ) + { + kdev_t dev = device & ~(max_part-1); + for ( j = max_part - 1; j >= 0; j-- ) + { + invalidate_device(dev+j, 1); + gd->part[MINOR(dev+j)].start_sect = 0; + gd->part[MINOR(dev+j)].nr_sects = 0; + gd->sizes[MINOR(dev+j)] = 0; + + vbd_state[dev+j] &= ~VBD_KNOWN; + } + printk(KERN_ALERT + "Virtual partitions found for /dev/%s - ignoring any " + "real partition information we may have found.\n", + disk_name(gd, MINOR(device), buf)); + } + + /* Need to skankily setup 'partition' information */ + gd->part[minor].start_sect = 0; + gd->part[minor].nr_sects = disk->capacity; + gd->sizes[minor] = disk->capacity; + + gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS; + + vbd_state[device] |= VBD_KNOWN; + } + else + { + /* Some final fix-ups depending on the device type */ + switch ( XD_TYPE(disk->info) ) + { + case XD_TYPE_CDROM: + case XD_TYPE_FLOPPY: + case XD_TYPE_TAPE: + gd->part[minor].nr_sects = disk->capacity; + gd->sizes[minor] = disk->capacity>>(BLOCK_SIZE_BITS-9); + gd->flags[minor >> gd->minor_shift] |= GENHD_FL_REMOVABLE; + printk(KERN_ALERT + "Skipping partition check on %s /dev/%s\n", + XD_TYPE(disk->info)==XD_TYPE_CDROM ? "cdrom" : + (XD_TYPE(disk->info)==XD_TYPE_TAPE ? "tape" : + "floppy"), disk_name(gd, MINOR(device), buf)); + + vbd_state[device] |= VBD_KNOWN; /* remember the VBD is there now */ + break; + + case XD_TYPE_DISK: + /* Only check partitions on real discs (not virtual!). */ + if ( gd->flags[minor>>gd->minor_shift] & GENHD_FL_VIRT_PARTNS ) + { + printk(KERN_ALERT + "Skipping partition check on virtual /dev/%s\n", + disk_name(gd, MINOR(device), buf)); + break; + } + register_disk(gd, device, gd->max_p, &xlvbd_block_fops, + disk->capacity); + + vbd_state[device] |= VBD_KNOWN; /* remember the VBD is there now */ + + break; + + default: + printk(KERN_ALERT "XenoLinux: unknown device type %d\n", + XD_TYPE(disk->info)); + break; + } + } + + printk(KERN_ALERT "XenoLinux Virtual Block Device Driver " + "installed [device: %04x]\n", device); + + return 0; +} + + +/** + * xlvbd_remove - see if a VBD should be removed and do so if appropriate + * @device: numeric device ID + * + * Updates the gendisk structure and invalidates devices. + * + * This is OK for now but in future, should perhaps consider where this should + * deallocate gendisks / unregister devices? + */ +int xlvbd_remove(int device) +{ + int major = MAJOR(device); + int minor = MINOR(device); + int is_ide = IDE_DISK_MAJOR(major); /* is this an ide device? */ + int is_scsi= SCSI_BLK_MAJOR(major); /* is this a scsi device? */ + int i; /* loop counter */ + int partno; + int max_part; + char * major_name; + + struct gendisk *gd; + + DPRINTK("xl_vbd.c::xlvbd_remove() - Removing a VBD\n"); + + /* if device is in use then we shouldn't change its settings */ + if(xldev_to_xldisk(device)->usage) + { + DPRINTK("xl_vbd.c::xlvbd_remove() - VBD in use, could not remove\n"); + printk(KERN_ALERT "Removing XenoLinux VBD failed - " + "in use [device: %x]\n", device); + return -1; + } + + if((gd = get_gendisk(device)) == NULL) + { + printk(KERN_ALERT + "xl_vbd.c::xlvbd_remove() - ERROR could not get gendisk\n"); + + return -1; + } + + if ( is_ide ) + { + major_name = XLIDE_MAJOR_NAME; + max_part = XLIDE_MAX_PART; + } + else if ( is_scsi ) + { + major_name = XLSCSI_MAJOR_NAME; + max_part = XLSCSI_MAX_PART; + } + else + { + major_name = XLVBD_MAJOR_NAME; + max_part = XLVBD_MAX_PART; + } + + partno = minor & (max_part - 1); + + DPRINTK("Got partno = 0x%x\n", partno); + + if(partno) /* if the VBD is mapped to a "partition" device node in Linux */ + { + int should_clear_virtpart = 1; /* if this is set true we should clear + * the GENHD_FL_VIRT_PARTNS flag in the + * gendisk */ + + gd->sizes[minor] = 0; + + for(i = 0; i < max_part; i++) + if(gd->sizes[minor - partno + i]) should_clear_virtpart = 0; + + /* if there aren't any virtual partitions here then clear the flag for + * this unit */ + if(should_clear_virtpart) + { + gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS; + + DPRINTK("xl_vbd.c::xlvbd_remove() - " + "cleared virtual partition flag\n"); + } + + gd->part[MINOR(device)].start_sect = 0; + gd->part[MINOR(device)].nr_sects = 0; + gd->sizes[MINOR(device)] = 0; + + invalidate_device(device, 1); + + vbd_state[device] &= ~VBD_KNOWN; /* forget VBD was ever there */ + } + else /* the VBD is mapped to a "whole disk drive" device node in Linux */ + { + for ( i = max_part - 1; i >= 0; i-- ) + { + invalidate_device(device+i, 1); + gd->part[MINOR(device+i)].start_sect = 0; + gd->part[MINOR(device+i)].nr_sects = 0; + gd->sizes[MINOR(device+i)] = 0; + + vbd_state[device+i] &= ~VBD_KNOWN; /* forget VBD was ever there */ + } + } + + printk(KERN_ALERT "XenoLinux Virtual Block Device removed " + " [device: %04x]\n", device); + return 0; +} + /* * Set up all the linux device goop for the virtual block devices (vbd's) that * xen tells us about. Note that although from xen's pov VBDs are addressed @@ -60,17 +406,15 @@ static struct block_device_operations xlvbd_block_fops = */ int __init xlvbd_init(xen_disk_info_t *xdi) { - int i, j, result, max_part; - struct gendisk *gd = NULL; - kdev_t device; - unsigned short major, minor, partno; - int is_ide, is_scsi; - char *major_name; - unsigned char buf[64]; - + int i; /* loop counter */ + SET_MODULE_OWNER(&xlvbd_block_fops); /* Initialize the global arrays. */ + + for( i = 0; i < 65536; i++) + vbd_state[i] = VBD_NODEV; + for ( i = 0; i < 256; i++ ) { /* from the generic ide code (drivers/ide/ide-probe.c, etc) */ @@ -100,201 +444,129 @@ int __init xlvbd_init(xen_disk_info_t *xdi) */ for ( i = 0; i < xdi->count; i++ ) { - device = xdi->disks[i].device; - major = MAJOR(device); - minor = MINOR(device); - is_ide = IDE_DISK_MAJOR(major); /* is this an ide device? */ - is_scsi= SCSI_BLK_MAJOR(major); /* is this a scsi device? */ - - if ( is_ide ) - { - major_name = XLIDE_MAJOR_NAME; - max_part = XLIDE_MAX_PART; - } - else if ( is_scsi ) - { - major_name = XLSCSI_MAJOR_NAME; - max_part = XLSCSI_MAX_PART; - } - else - { - major_name = XLVBD_MAJOR_NAME; - max_part = XLVBD_MAX_PART; - } + xlvbd_init_device(&xdi->disks[i]); + } - partno = minor & (max_part - 1); + return 0; +} - if ( (gd = get_gendisk(device)) == NULL ) - { - result = register_blkdev(major, major_name, &xlvbd_block_fops); - if ( result < 0 ) - { - printk(KERN_ALERT "XL VBD: can't get major %d\n", major); - continue; - } +/** + * xlvbd_update_vbds - reprobes the VBD status and performs updates driver state + * + * The VBDs need to be updated in this way when the domain is initialised and + * also each time we receive an XLBLK_UPDATE event. + * + * The vbd_state array is consistent on entry to and exit from this function but + * not whilst the function runs, so this should not be called re-entrantly. + */ +void xlvbd_update_vbds(void) +{ + int i; /* loop counter */ + int ret; /* return values */ + block_io_op_t op; /* for talking to Xen */ - if ( is_ide ) - { - blksize_size[major] = xlide_blksize_size; - hardsect_size[major] = xlide_hardsect_size; - max_sectors[major] = xlide_max_sectors; - read_ahead[major] = 8; /* from drivers/ide/ide-probe.c */ - } - else if ( is_scsi ) - { - blksize_size[major] = xlscsi_blksize_size; - hardsect_size[major] = xlscsi_hardsect_size; - max_sectors[major] = xlscsi_max_sectors; - read_ahead[major] = 0; /* XXX 8; -- guessing */ - } - else - { - blksize_size[major] = xlvbd_blksize_size; - hardsect_size[major] = xlvbd_hardsect_size; - max_sectors[major] = xlvbd_max_sectors; - read_ahead[major] = 8; - } + xen_disk_info_t *xdi = &xlblk_disk_info; /* pointer to structures in + * xl_block.c */ - blk_init_queue(BLK_DEFAULT_QUEUE(major), do_xlblk_request); + /* Probe for disk information. */ + memset(&op, 0, sizeof(op)); + op.cmd = BLOCK_IO_OP_VBD_PROBE; + op.u.probe_params.domain = 0; + + xdi->count = 0; /* need to keep resetting this to zero because the probe + * will append results after "used" space in the array */ - /* - * Turn off barking 'headactive' mode. We dequeue buffer heads as - * soon as we pass them down to Xen. - */ - blk_queue_headactive(BLK_DEFAULT_QUEUE(major), 0); + memcpy(&op.u.probe_params.xdi, &xlblk_disk_info, sizeof(xlblk_disk_info)); - /* Construct an appropriate gendisk structure. */ - gd = kmalloc(sizeof(struct gendisk), GFP_KERNEL); - gd->major = major; - gd->major_name = major_name; + ret = HYPERVISOR_block_io_op(&op); - gd->max_p = max_part; - if ( is_ide ) - { - gd->minor_shift = XLIDE_PARTN_SHIFT; - gd->nr_real = XLIDE_DEVS_PER_MAJOR; - } - else if ( is_scsi ) - { - gd->minor_shift = XLSCSI_PARTN_SHIFT; - gd->nr_real = XLSCSI_DEVS_PER_MAJOR; - } - else - { - gd->minor_shift = XLVBD_PARTN_SHIFT; - gd->nr_real = XLVBD_DEVS_PER_MAJOR; - } + if ( ret ) + { + printk(KERN_ALERT "Could not probe disks (%d)\n", ret); + } - /* - ** The sizes[] and part[] arrays hold the sizes and other - ** information about every partition with this 'major' (i.e. - ** every disk sharing the 8 bit prefix * max partns per disk) - */ - gd->sizes = kmalloc(max_part*gd->nr_real*sizeof(int), GFP_KERNEL); - gd->part = kmalloc(max_part*gd->nr_real*sizeof(struct hd_struct), - GFP_KERNEL); - memset(gd->sizes, 0, max_part * gd->nr_real * sizeof(int)); - memset(gd->part, 0, max_part * gd->nr_real - * sizeof(struct hd_struct)); - - - gd->real_devices = kmalloc(gd->nr_real * sizeof(xl_disk_t), - GFP_KERNEL); - memset(gd->real_devices, 0, gd->nr_real * sizeof(xl_disk_t)); - - gd->next = NULL; - gd->fops = &xlvbd_block_fops; - - gd->de_arr = kmalloc(gd->nr_real * sizeof(*gd->de_arr), - GFP_KERNEL); - gd->flags = kmalloc(gd->nr_real * sizeof(*gd->flags), GFP_KERNEL); - - memset(gd->de_arr, 0, gd->nr_real * sizeof(*gd->de_arr)); - memset(gd->flags, 0, gd->nr_real * sizeof(*gd->flags)); + /* copy back the [updated] count parameter */ + xlblk_disk_info.count = op.u.probe_params.xdi.count; - add_gendisk(gd); + DPRINTK("Retrieved %d disks\n",op.u.probe_params.xdi.count); + + + for( i = 0; i < 65536; i++ ) + vbd_state[i] |= VBD_NODEV; + + for( i = 0; i < xdi->count; i++ ) + { + int device = xdi->disks[i].device; + xl_disk_t *d; - blk_size[major] = gd->sizes; - } + vbd_state[device] &= ~VBD_NODEV; - if ( XD_READONLY(xdi->disks[i].info) ) - set_device_ro(device, 1); + DPRINTK("Inspecting xen_disk_t: device = %hx, info = %hx, " + "capacity = %lx, domain = %d\n", + xdi->disks[i].device, xdi->disks[i].info, xdi->disks[i].capacity, + xdi->disks[i].domain); - gd->flags[minor >> gd->minor_shift] |= GENHD_FL_XENO; - - if ( partno != 0 ) + if(xdi->disks[i].info & XD_FLAG_VIRT) { - /* - * If this was previously set up as a real disc we will have set - * up partition-table information. Virtual partitions override - * 'real' partitions, and the two cannot coexist on a device. - */ - if ( gd->sizes[minor & ~(max_part-1)] != 0 ) - { - kdev_t dev = device & ~(max_part-1); - for ( j = max_part - 1; j >= 0; j-- ) - { - invalidate_device(dev+j, 1); - gd->part[MINOR(dev+j)].start_sect = 0; - gd->part[MINOR(dev+j)].nr_sects = 0; - gd->sizes[MINOR(dev+j)] = 0; - } - printk(KERN_ALERT - "Virtual partitions found for /dev/%s - ignoring any " - "real partition information we may have found.\n", - disk_name(gd, MINOR(device), buf)); - } + /* RACE: need to fix this for SMP / pre-emptive kernels */ - /* Need to skankily setup 'partition' information */ - gd->part[minor].start_sect = 0; - gd->part[minor].nr_sects = xdi->disks[i].capacity; - gd->sizes[minor] = xdi->disks[i].capacity; + d = xldev_to_xldisk(device); - gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS; + /* only go on to monkey with this stuff if we successfully got the + * xldisk and it says no-one else is using the disk OR if we didn't + * successfully retrieve the xldisk (so it doesn't exist and nobody + * can be using it), otherwise skip on to the next device */ + if(d != NULL && d->usage > 0) + { + printk(KERN_ALERT "XenoLinux VBD Driver: " + "skipping update in a disk currently in use"); + DPRINTK("Usage = %d\n", d->usage); + continue; /* skip to next device */ + } + + printk(KERN_ALERT "XenoLinux VBD Driver: updating a VBD " + "[device: %x]\n", device); + /* also takes care of any overrides (i.e. due to VBDs mapped to + * partitions overriding VBDs mapped to disks) and of registering + * disks */ + xlvbd_init_device(xdi->disks + i); } - else + + } + + for( i = 0; i < 65536; i++ ) + { + switch(vbd_state[i]) { - /* Some final fix-ups depending on the device type */ - switch ( XD_TYPE(xdi->disks[i].info) ) - { - case XD_TYPE_CDROM: - case XD_TYPE_FLOPPY: - case XD_TYPE_TAPE: - gd->part[minor].nr_sects = xdi->disks[i].capacity; - gd->sizes[minor] = xdi->disks[i].capacity>>(BLOCK_SIZE_BITS-9); - gd->flags[minor >> gd->minor_shift] |= GENHD_FL_REMOVABLE; - printk(KERN_ALERT - "Skipping partition check on %s /dev/%s\n", - XD_TYPE(xdi->disks[i].info)==XD_TYPE_CDROM ? "cdrom" : - (XD_TYPE(xdi->disks[i].info)==XD_TYPE_TAPE ? "tape" : - "floppy"), disk_name(gd, MINOR(device), buf)); - break; - - case XD_TYPE_DISK: - /* Only check partitions on real discs (not virtual!). */ - if ( gd->flags[minor>>gd->minor_shift] & GENHD_FL_VIRT_PARTNS ) - { - printk(KERN_ALERT - "Skipping partition check on virtual /dev/%s\n", - disk_name(gd, MINOR(device), buf)); - break; - } - register_disk(gd, device, gd->max_p, &xlvbd_block_fops, - xdi->disks[i].capacity); - break; - - default: - printk(KERN_ALERT "XenoLinux: unknown device type %d\n", - XD_TYPE(xdi->disks[i].info)); - break; - } + case VBD_NODEV | VBD_KNOWN: /* a VBD we knew about before has gone */ + + DPRINTK("About to remove VBD 0x%x\n",i); + + ret = xlvbd_remove(i); + + if(ret) DPRINTK("Failed to remove VBD\n"); + + break; + + case VBD_NODEV: /* there's nothing here and there wasn't anything + * before */ + break; + + case VBD_KNOWN: /* the device is present and it's set up */ + break; + + case 0: /* there's a device present we haven't set up - either + * one of the "non virtual" VBDs or we weren't able to + * update it because it was mounted */ + break; + + default: /* if there's any other weird combination, something + * unexpected is happening */ + printk(KERN_ALERT "xl_vbd.c::xlvbd_update_vbds: BUG - Unknown state " + "when updating VBDs: 0x%x\n", vbd_state[i]); } - - printk(KERN_ALERT "XenoLinux Virtual Block Device Driver " - "installed [device: %04x]\n", device); } - return 0; } void xlvbd_cleanup(void) @@ -333,7 +605,10 @@ void xlvbd_cleanup(void) if ( unregister_blkdev(major, major_name) != 0 ) printk(KERN_ALERT "XenoLinux Virtual Block Device Driver:" - "major device %04x uninstalled w/ errors\n", major); + "major device %04x uninstalled w/ errors\n", major); + + /* XXX shouldn't we remove the gendisk from the kernel linked list and + * deallocate the memory here? */ } } -- 2.30.2